## ----setseedch5, echo = FALSE--------------------------------------------
set.seed(1982)

knitr::opts_chunk$set(
                      echo = FALSE,
                      warning = FALSE,
                      message = FALSE
)


## ----loadlibsch5---------------------------------------------------------
library(reshape2)
library(dplyr)
library(pander)
library(scales)
library(ggplot2)
library(ggrepel)
library(seriation)
library(stringr)
library(lme4)
library(arm)
library(broom)
library(rstanarm)

most_common_or_random <- function(x) {
    thetab <- sort(table(x), decreasing = TRUE)
    if (thetab[1] == thetab[2]) {
        retval <- sample(names(thetab), size = 1)
    } else {
        retval <- names(thetab)[1]
    }
    return(retval)
}

source("./lib/common_funcs.R")


## ----dataprep------------------------------------------------------------
if (!file.exists("cache/panel_selection_data.rds")) {
    load("./data/ch2_data.RData")
### Suppose we exclude Anderson?
### sheet1 <- subset(sheet1, nJudges > 3)
    roles <- read.csv("./office_holders.csv")
    roles$Start <- as.Date(roles$Start)
    roles$End <- as.Date(roles$End)
### Task: get a list of all the judges who could have sat on a case
### Strategy: generate all combinations of judges and cases
### knock out judges who were appointed after the case was heard
### or who retired before the case was heard
    nJudgesEver <- 20
    dat <- expand.grid(Judge = 1:nJudgesEver,
                       NEUTRAL = unique(sheet1$NEUTRAL))
    old <- nrow(dat)
    dat <- merge(dat,
                 subset(roles, Role == "Member"),
                 all = TRUE)
    new <- nrow(dat)
    stopifnot(old == new)
    dat$Role <- NULL
    old <- nrow(dat)
    dat <- merge(dat,
                 unique(sheet1[,c("NEUTRAL", "HEARINGDATE1")]),
                 all = TRUE)
    new <- nrow(dat)
    stopifnot(old == new)

### Add in a hearing date for Anderson
    dat$HEARINGDATE1[which(dat$NEUTRAL == "[2012] UKSC 7")] <- as.Date("2012-02-01")
    dat <- subset(dat, Start < HEARINGDATE1)
    dat <- subset(dat, End > HEARINGDATE1)
    nrow(dat)


    maxJudges <- 9
    participation <- sheet1[,c("NEUTRAL",
                               paste0("JUDGE", 1:maxJudges))]
    participation <- melt(participation, id.var = "NEUTRAL", value.name = "Judge")
    participation <- unique(participation)
    participation$variable <- NULL
    participation <- subset(participation, !is.na(Judge))
    participation$chosen <- 1

    old <- nrow(dat)
    dat <- merge(dat, participation,
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)
    dat$chosen[is.na(dat$chosen)] <- 0


### Independent variables
### specialism
    spec$Name <- NULL

    spec <- melt(spec, id.vars = c("Judge"),
                 variable.name = "Area",
                 value.name = "Specialism")
    spec$Area <- as.character(spec$Area)
    spec$Area[which(spec$Area == "NI")] <- "N Ireland"
    spec$Area[which(spec$Area == "Tax.and.Chancery")] <- "Chancery"


    area.df <- unique(sheet1[,c("NEUTRAL", "Area")])
    area.df <- area.df %>%
        group_by(NEUTRAL) %>%
        summarize(Area = most_common_or_random(Area))

    old <- nrow(dat)
    dat <- merge(dat, area.df,
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)
    old <- nrow(dat)
    dat <- merge(dat, spec,
                 by = c("Judge", "Area"),
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

### workload
    old <- nrow(dat)
    dat <- merge(dat, individualWorkload,
                 by.x = c("HEARINGDATE1","Judge"),
                 by.y = c("Date","Judge"),
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

### importance by office holding
    old <- nrow(dat)
    importance <- sheet1[,c("NEUTRAL", "Importance")]
    importance <- importance %>%
        group_by(NEUTRAL) %>%
        summarize(Importance = max(Importance, na.rm = TRUE))
    dat <- merge(dat, importance,
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

    rm(importance)
    dat$officeHolder <- 0
    for (i in which(roles$Role %in% c("President", "Vice-President"))) {
        matchpos <- which(dat$Judge == roles$Judge[i] &
                          dat$HEARINGDATE1 > roles$Start[i] &
                          dat$HEARINGDATE1 < roles$End[i])
        dat$officeHolder[matchpos] <- 1
    }

### Seniority
    dat$Seniority <- (dat$HEARINGDATE1 - dat$Start) / 365.25
    dat$firstSixMonths <- as.numeric((dat$HEARINGDATE1 - dat$Start) < 180)
    dat$lastSixMonths <- as.numeric((dat$End - dat$HEARINGDATE1) < 180)
    dat$Start <- dat$End <- dat$Role <- NULL

### agreement with senior judges
### Strategy: get all the hearing dates
### subtract one,
### get agreement on cases up to that date
    get_agreement <- function(df) {
        holder <- list()
        for (i in 1:9) {
            holder[[i]] <- df[,c("NEUTRAL", "CASEID","HANDDOWN",paste0("JUDGE",i),paste0("JUDGE",i,"DISS"))]	
            names(holder[[i]]) <- c("NEUTRAL", "CASEID","HANDDOWN","JUDGE","DISS")
        }
        holder <- do.call("rbind", holder)
        holder <- subset(holder, !is.na(JUDGE))
        holder$DISS <- ifelse(holder$DISS == "C", 1, 0)

        holder <- holder %>%
            group_by(NEUTRAL) %>%
            mutate(w8 = 1 / length(unique(CASEID)))

### Create matrix
        votemat <- acast(holder, NEUTRAL + CASEID + HANDDOWN ~ JUDGE, 
                         value.var = "DISS")
        votemat[is.na(votemat)] <- 0

        holder$value <- 1
        presencemat <- acast(holder, NEUTRAL + CASEID + HANDDOWN ~ JUDGE, 
                             value.var = "value")
        presencemat[is.na(presencemat)] <- 0

### Create weights
        w8 <- apply(acast(holder, NEUTRAL + CASEID + HANDDOWN ~ JUDGE, 
                          value.var = "w8"),
                    1,
                    function(x) unique(na.omit(x)))

### Sort the first two columns of a dataframe
### and remove duplicates
        dedupe <- function(d) {
            pasteorder <- apply(d[,1:2], 1,
                                function(x) paste0(sort(x), collapse = "/"))
### Take the second one of the two
            d <- d[which(duplicated(pasteorder)),]
            d <- d[which(d[,1] != d[,2]),]
            return(d)
        }
        res <- list()
        for (w in unique(holder$w8)) {
            v <- votemat[which(w8 == w),]
            p <- presencemat[which(w8 == w),]
            agree <- crossprod(v, v)
            sat <- crossprod(p, p)
            agree <- melt(agree,
                          varnames = c("judge1","judge2"),
                          value.name = "agree")
            sat <- melt(sat,
                        varnames = c("judge1","judge2"),
                        value.name = "sat")
            agree <- dedupe(agree)
            sat <- dedupe(sat)
            pos <- which(unique(holder$w8) == w)
            res[[pos]] <- merge(agree, sat, all = T)
            res[[pos]]$agree <- res[[pos]]$agree * w
            res[[pos]]$sat <- res[[pos]]$sat * w
        }
        res <- do.call("rbind", res)
        res <- res %>%
            group_by(judge1, judge2) %>%
            summarize(agree = sum(agree),
                      sat = sum(sat))
        res$prop <- res$agree / res$sat
        return(res)
    }

### For all of the dates
    holder <- list()
    for (d in unique(as.character(dat$HEARINGDATE1))) {
        d <- as.Date(d)
        votes_until_then <- subset(sheet1, HEARINGDATE1 < d)
        agreement <- get_agreement(votes_until_then)
        class(agreement) <- "data.frame"
        the_president <- roles$Judge[which(roles$Role == "President" & roles$Start < d & roles$End > d)]
        agreement_with_pres <- subset(agreement, judge1 == the_president)
        agreement_with_pres$date <- format(d,"%Y%m%d")
        agreement_with_pres$Judge <- agreement_with_pres$judge2
        agreement_with_pres$judge2 <- agreement_with_pres$judge1 <- NULL
        holder[[which(unique(dat$HEARINGDATE1) == d)]] <- agreement_with_pres
    }
    agreement_with_pres <- do.call("rbind", holder)
    agreement_with_pres$date <- as.Date(agreement_with_pres$date, format = "%Y%m%d")
    old <- nrow(dat)
    dat <- merge(dat, agreement_with_pres,
                 by.x = c("HEARINGDATE1", "Judge"),
                 by.y = c("date", "Judge"),
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

### If information is missing, it's
### most often because we're dealing with someone who has
### holding office
    dat$prop[is.na(dat$prop)] <- 1


### OpinionBelow
    sheet1$Prob <- .51 + (0.5 - sheet1$OpinionBelow)
    prob.df <- unique(sheet1[,c("Prob", "NEUTRAL")])
    prob.df <- prob.df %>%
        group_by(NEUTRAL) %>%
        summarize(Prob = mean(Prob, na.rm = TRUE))
    old <- nrow(dat)
    dat <- merge(dat, prob.df,
                 by.x = c("NEUTRAL"),
                 by.y = c("NEUTRAL"),
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

### HR and Devolution issues
    hr_dev <- unique(sheet1[,c("NEUTRAL", "HR", "DEVOLUTION")])
    hr_dev$HR <- as.numeric(hr_dev$HR == "Yes")
    hr_dev$DEVOLUTION <- as.numeric(hr_dev$DEVOLUTION == "Yes")
    hr_dev <- hr_dev %>%
        group_by(NEUTRAL) %>%
        summarize(HR = max(HR),
                  DEVOLUTION = max(DEVOLUTION))
    old <- nrow(dat)
    dat <- merge(dat, hr_dev,
                 by.x = c("NEUTRAL"),
                 by.y = c("NEUTRAL"),
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

### Combined Workload
    old <- nrow(dat)
    dat <- merge(dat, combWorkload,
                 by.x = c("HEARINGDATE1"),
                 by.y = c("Date"),
                 all.x = TRUE,
                 all.y = FALSE,
                 suffixes = c(".judge", ".court"))
    new <- nrow(dat)
    stopifnot(old == new)


### Who did the PTA?
    pta <- read.csv("pta_data.csv")
    pta.m <- melt(pta, id.vars = c("CASEID"),
                  measure.vars = paste0("Justice",1:5),
                  value.name = "Judge")
    pta.m <- merge(pta.m, sheet1[,c("CASEID", "NEUTRAL")],
                   all.x = FALSE,
                   all.y = TRUE)
    pta.m$CASEID <- pta.m$variable <- NULL
    pta.m$PTAd <- 1
    pta.m <- subset(pta.m, !is.na(Judge))
    old <- nrow(dat)
    dat <- merge(dat, unique(pta.m),
                 by = c("NEUTRAL","Judge"),
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

    dat$PTAd[is.na(dat$PTAd)] <- 0

### nJudges
    old <- nrow(dat)
    dat <- merge(dat, unique(sheet1[,c("NEUTRAL", "nJudges")]),
                 by = "NEUTRAL",
                 all.x = TRUE,
                 all.y = FALSE)
    new <- nrow(dat)
    stopifnot(old == new)

    dat$nJudges <- factor(as.character(dat$nJudges),
                          levels = as.character(c(3, 5,7,9)),
                          ordered = FALSE)

    saveRDS(dat, file = "cache/panel_selection_data.rds")

} else {
    dat <- readRDS("cache/panel_selection_data.rds")
}


## ----descriptives--------------------------------------------------------
### For each judge, getting the total number of cases in which they participated,
### the total number in which they could have participated, and (hence)
### the rate at which they participated
plot.df <- dat %>%
    group_by(Judge) %>%
    summarize(nPossible = n(), nActual = sum(chosen)) %>%
    mutate(Rate = nActual / nPossible)

plot.df <- plot.df[order(plot.df$Rate),]
plot.df <- subset(plot.df, Judge %in% 1:20)
plot.df$JudgeName <- num2judge(plot.df$Judge)
plot.df$JudgeName <- factor(plot.df$JudgeName,
                            levels = plot.df$JudgeName,
                            ordered = TRUE)
plot.df$custom.cex <- cscale(plot.df$nActual, area_pal()) / 2


## ----satplot, fig = TRUE, fig.cap = "Rate at which judges sit on cases, calculated as the number of cases heard by the judge divided by the total number of cases heard during that judge's time in office. Plotted points are in proportion to the number of cases heard. Solid grey line indicates the average across judges. ", fig.width = 6, fig.height = 6----

ggplot(plot.df, aes(x = JudgeName, y = Rate, size = nActual)) +
    geom_point() +
    scale_x_discrete("Judge") +
    scale_y_continuous("Rate\n(cases sat on divided by\ncases during tenure)",
                       labels = scales::percent_format(accuracy = 1)) + 
    scale_size_continuous("Number of cases") +
    geom_hline(yintercept = mean(plot.df$Rate), col = "darkgrey") + 
    coord_flip() + 
    theme_uksc()




## ----satbytype, fig = TRUE, fig.cap = "Proportion of caseload of cases of different areas, relative to average (mean) caseload. Highlighted bars in red indicate areas where a given judge's caseload is significantly higher or lower in that area than the caseload of the reference judge (Lord Neuberger). ", fig.width = 6, fig.height = 7----
current.judges <- c(6, 8, 10, 11, 13, 14:20)
current.judges <- 1:20

plot.df <- subset(dat, chosen == 1) %>%
    filter(Judge %in% current.judges) %>% 
    group_by(Judge, Area) %>%
    summarize(Cases = n()) %>%
    group_by(Judge) %>%
    mutate(Proportion = Cases / sum(Cases),
           Total = sum(Cases)) %>%
    group_by(Area) %>%
    mutate(Rank = rank(-Proportion))

### For each area, run a regression, save dummies
plot.df$tmp <- factor(as.character(plot.df$Judge))
plot.df$tmp <- relevel(plot.df$tmp, "13")
hilite.df <- plot.df %>%
    group_by(Area) %>%
    do(tidy(glm(cbind(Cases, Total - Cases) ~ factor(tmp),
                 data = .,
                family = binomial))) %>%
    filter(term != "(Intercept)") %>%
    mutate(Judge = sub("factor(tmp)", "", term, fixed = TRUE),
           highlight = p.value < 0.05) %>%
    dplyr::select(Judge, Area, highlight)

plot.df <- merge(plot.df, hilite.df,
                 all = TRUE)
plot.df$highlight[is.na(plot.df$highlight)] <- FALSE

cormat <- acast(plot.df, Judge ~ Area, value.var = "Proportion")
ord <- seriate(cor(cormat, use = "pairwise"))
my.areas <- colnames(cormat)[as.vector(ord[[1]])]

### What if we get rank?
### De-mean
plot.df <- plot.df %>%
    group_by(Area) %>%
    mutate(Proportion.dm = Proportion - mean(Proportion, na.rm = TRUE))

### We would like to know whether to highlight the disparity


### Re-order levels so they're in order
plot.df$Area <- factor(plot.df$Area,
                       levels = my.areas)

### Re-order judges
my.levels <- subset(plot.df, Area == "Public") %>% arrange(Proportion) %>% pull(Judge)
my.levels <- num2judge(my.levels)
plot.df$Judge.name <- factor(num2judge(plot.df$Judge),
                             levels = my.levels,
                             ordered = TRUE)

ggplot(plot.df, aes(x = Area, y = Proportion.dm, fill = highlight)) +
    facet_wrap(~Judge.name, ncol = 4, nrow = 5) +
    scale_fill_manual("",
                      values = c("darkgrey", "darkred"),
                      guide = "none") +
    scale_x_discrete("") +
    scale_y_continuous("Proportion relative to average",
                       labels = scales::percent_format(accuracy = 1)) + 
    geom_bar(stat = "identity") +
    coord_flip() +
    theme_uksc()



## ----shapeup-------------------------------------------------------------
### Are there cases where a judge was both chosen and not chosen

### Go through each case
combdf <- lapply(unique(dat$NEUTRAL), function(n) {
### expand.grid to create combinations
    chosen_judges <- which(dat$chosen == 1 & dat$NEUTRAL == n)
    chosen_judges <- dat$Judge[chosen_judges]
    unchosen_judges <- which(dat$chosen == 0 & dat$NEUTRAL == n)
    unchosen_judges <- dat$Judge[unchosen_judges]
    tmp <- expand.grid(NEUTRAL = n,
                       JudgeA = unique(chosen_judges),
                       JudgeB = unique(unchosen_judges),
                       AprefB = 1)
### shuffle?
    shuffle <- (rbinom(n = 14, size = 1, prob = 0.5) > 0)
    old <- tmp
    new <- tmp
    new$JudgeA[shuffle] <- old$JudgeB[shuffle] 
    new$JudgeB[shuffle] <- old$JudgeA[shuffle] 
    new$AprefB[shuffle] <- 0
    
### merge with that judges' info
    rm(tmp, old)
    jvars <- c("Specialism", "workloadTotal.judge",
               "officeHolder", "prop",
               "PTAd", "Seniority")
    new <- merge(new, subset(dat, NEUTRAL == n)[,c("Judge", jvars)],
                 by.x = c("JudgeA"),
                 by.y = "Judge",
                 all.x = TRUE,
                 all.y = FALSE)
    new <- merge(new, subset(dat, NEUTRAL == n)[,c("Judge", jvars)],
                 by.x = c("JudgeB"),
                 by.y = "Judge",
                 all.x = TRUE,
                 all.y = FALSE,
                 suffixes = c(".A", ".B"))
### create the differenced variables
    for (j in jvars) {
        outvar <- paste0(j, ".diff")
        new[, outvar] <- new[, paste0(j, ".A")] -
            new[, paste0(j, ".B")]
    }
### Add in Importance as a moderator
    new$Importance <- unique(dat$Importance[which(dat$NEUTRAL == n)])
    ### Create a weighting variable
    new$w8 <- length(chosen_judges) / nrow(new)
    return(new)
})

combdf <- do.call("rbind", combdf)




## ----smrystatsch5--------------------------------------------------------
specbar <- round(mean(combdf$Specialism.diff, na.rm = TRUE), 2)
specsd <- round(sd(combdf$Specialism.diff, na.rm = TRUE), 2)

workbar <- round(mean(combdf$workloadTotal.judge.diff, na.rm = TRUE), 2)
worksd <- round(sd(combdf$workloadTotal.judge.diff, na.rm = TRUE), 2)

senbar <- round(mean(as.numeric(combdf$Seniority.diff), na.rm = TRUE), 2)
sensd <- round(sd(as.numeric(combdf$Seniority.diff), na.rm = TRUE), 2)
prefbar <- round(mean(as.numeric(combdf$prop.diff), na.rm = TRUE), 2)
prefsd <- round(sd(as.numeric(combdf$prop.diff), na.rm = TRUE), 2)



## ----models--------------------------------------------------------------
combdf.bak <- combdf
vars <- c("Importance", "Specialism.diff",
          "workloadTotal.judge.diff",
          "Seniority.diff",
          "prop.diff")

means_std_vars <- numeric()
sd_std_vars <- numeric()
for (v in vars) {
    means_std_vars <- c(means_std_vars,
                        mean(combdf[, v], na.rm = TRUE))
    sd_std_vars <- c(sd_std_vars,
                     sd(combdf[, v], na.rm = TRUE))
    combdf[,v] <- arm::rescale(as.numeric(combdf[, v]))
}

names(means_std_vars) <- names(sd_std_vars) <- vars

if (file.exists("cache/05_cached_results.RData")) {
    load("cache/05_cached_results.RData")
} else {
    legalmod <- glmer(AprefB ~ Specialism.diff * Importance +
                          (1|NEUTRAL),
                      data = combdf,
                      family = binomial)
    
    legalmod.rstanarm <- stan_glmer(formula(legalmod),
                                    data = combdf,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)

    orgmod <- glmer(AprefB ~ (workloadTotal.judge.diff +
                              PTAd.diff +
                              Seniority.diff +
                              officeHolder.diff) * Importance +
                             (1|NEUTRAL),
                    family = binomial,
                    data = combdf)
    
    orgmod.rstanarm <- stan_glmer(formula(orgmod),
                                  data = combdf,
                                  family = binomial,
                                  chains = 2, cores = 2, seed = 1982)
    
    polmod <- glmer(AprefB ~ prop.diff * Importance + (1|NEUTRAL),
                    family = binomial,
                    data = combdf)
    
    polmod.rstanarm <- stan_glmer(formula(polmod),
                                  data = combdf,
                                  family = binomial,
                                  chains = 2,
                                  cores = 2,
                                  seed = 1982)
    
    combmod <- glmer(AprefB ~ Specialism.diff * Importance + 
                         officeHolder.diff * Importance +
                         prop.diff * Importance +
                         Seniority.diff * Importance + 
                         workloadTotal.judge.diff * Importance +
                         PTAd.diff * Importance + 
                         (1|NEUTRAL),
                     data = combdf,
                     family = binomial)
    
    combmod.rstanarm <- stan_glmer(formula(combmod),
                                   data = combdf,
                                   family = binomial,
                                   chains = 2, cores = 2, seed = 1982)
    
    save(list = c("legalmod",
                  "legalmod.rstanarm",
                  "orgmod",
                  "orgmod.rstanarm",
                  "polmod",
                  "polmod.rstanarm",
                  "combmod",
                  "combmod.rstanarm"),
     file = "cache/05_cached_results.RData")
}


## ----legalplot, fig = TRUE, fig.cap = "Legal model of panel selection. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals.", fig.width = 6, fig.height = 2.5----
## Figure height should be what, (nCoefs + 2) / 2.5?
legalmod.labs <- list("(Intercept)" = "Intercept",
                      "Specialism.diff" = "Difference in specialisation",
                      "Specialism.diff:Importance" = "Difference in specialisation × importance",
                      "Importance" = "Importance")

mycoefplot(legalmod.rstanarm,
           ylab = "Coefficient value",
           drop = "Importance",
           sec.ylab = "Odds are ... times greater",
           variable.labels = legalmod.labs)

spec <- tidy(legalmod.rstanarm) %>%
    filter(term == "Specialism.diff") %>%
    mutate(estimate = round(exp(estimate))) %>% 
    pull(estimate)



## ----orgplot, fig = TRUE, fig.cap = "Organisational model of panel selection. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 4.5----
## Figure height should be what, (nCoefs + 2) / 2.5?
orgmod.labs <- list("(Intercept)" = "Intercept",
                    "PTAd.diff" = "One judge sat on PTA panel",
                    "officeHolder.diff" = "One judge is (Deputy) President",
                    "workloadTotal.judge.diff" = "Difference in workloads",
                    "Seniority.diff:Importance" = "Difference in seniority × importance",
                    "Importance" = "Importance",
                    "officeHolder.diff:Importance" = "One judge is (Deputy) President × importance",
                    "workloadTotal.judge.diff:Importance" = "Difference in workloads × importance",
                    "PTAd.diff:Importance" = "One judge sat on PTA panel × importance",
                    "Seniority.diff" = "Difference in seniority")

mycoefplot(orgmod.rstanarm,
           ylab = "Coefficient value",
           drop = "Importance",
           sec.ylab = "Odds are ... times greater",
           variable.labels = orgmod.labs)

ptad <- tidy(orgmod.rstanarm) %>%
    filter(term == "PTAd.diff") %>%
    mutate(estimate = round(exp(estimate), 2)) %>% 
    pull(estimate)



## ----polplot, fig = TRUE, fig.cap = "Political model of panel selection. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals.", fig.width = 6, fig.height = 2.5----
## Figure height should be what, (nCoefs + 2) / 2.5?
polmod.labs <- list("Importance" = "Importance",
                    "prop.diff" = "Difference in rate of agreement with President",
                    "prop.diff:Importance" = "Difference in rate of agreement × importance")

mycoefplot(polmod.rstanarm,
           ylab = "Coefficient value",
           drop = "Importance",
           sec.ylab = "Odds are ... times greater",
           variable.labels = polmod.labs)



## ----combplot, fig = TRUE, fig.cap = "Combined model of panel selection. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals.", fig.width = 6, fig.height = 6----
## Figure height should be what, (nCoefs + 2) / 2.5?
combmod.labs <- c(legalmod.labs, orgmod.labs, polmod.labs)
combmod.labs <- combmod.labs[!duplicated(combmod.labs)]
### Add on interaction terms with reverse order
addons <- combmod.labs
names(addons) <- gsub("(.*):(.*)", "\\2:\\1", names(combmod.labs))
combmod.labs <- c(combmod.labs, addons)

mycoefplot(combmod.rstanarm,
           drop = "Importance",
           ylab = "Coefficient value",
           sec.ylab = "Odds are ... times greater",
           variable.labels = combmod.labs)



## ----preds, results = "asis", eval = TRUE, fig = TRUE, fig.cap = "Changes in the probability of selection when comparing two otherwise identical judges", fig.width = 5, fig.height = 6----
predictprob <- function(mod, newdata) {
    if (inherits(mod, "stanreg")) {
        res <- posterior_linpred(mod, newdata = newdata, draws = 1000)
    } else { ## it is glm or glmer
        require(arm)
        beta <- arm::sim(mod, n.sims = 1000)
        beta <- beta@fixef
        form <- formula(mod)
        form <- update(form, . ~ . - (1|NEUTRAL) - (1|JudgeA))
        form <- form[-2]
        mm <- model.matrix(form, newdata)
        res <- numeric(0)
        for (i in 1:nrow(beta)) {
            res <- c(res,
                     mm %*% beta[i,])
        }
    }

    res <- plogis(res)
    return(res)

}

cf <- function(mod, old, new, alpha = 0.05) {
    oldprobs <- predictprob(mod, old)
    newprobs <- predictprob(mod, new)
    ## Get the effect by sim, obs.
    me <- newprobs - oldprobs
    ## Average across obs.
    ame <- rowMeans(me)
    ## Get quantities across sims
    return(data.frame(mean = mean(ame),
                      lo = quantile(ame, alpha/2),
                      hi = quantile(ame, 1 - alpha / 2),
                      llo = quantile(ame, 1/12),
                      hhi = quantile(ame, 11/12)))
}


### What do we want?
### Change: low importance, high importance
low_imp <- within(combdf, Importance <- min(combdf$Importance))
high_imp <- within(combdf, Importance <- max(combdf$Importance))

### Workload
the.change <- 10 / 
    sd_std_vars["workloadTotal.judge.diff"] +
    means_std_vars["workloadTotal.judge.diff"]

workload.on.lo <- workload.off.lo <- low_imp
workload.off.lo$workloadTotal.judge.diff <- mean(combdf$workloadTotal.judge.diff)
workload.on.lo$workloadTotal.judge.diff <- mean(combdf$workloadTotal.judge.diff) +
    the.change

workload.chg.lo <- cf(combmod.rstanarm,
                      workload.off.lo,
                      workload.on.lo)


workload.on.hi <- workload.off.hi <- high_imp
workload.off.hi$workloadTotal.judge.diff <- mean(combdf$workloadTotal.judge.diff)
workload.on.hi$workloadTotal.judge.diff <- mean(combdf$workloadTotal.judge.diff) +
    the.change

workload.chg.hi <- cf(combmod.rstanarm,
                      workload.off.hi,
                      workload.on.hi)

### PTAd
the.change <- 1
PTAd.on.lo <- PTAd.off.lo <- low_imp
PTAd.off.lo$PTAd.diff <- 0
PTAd.on.lo$PTAd.diff <- 0 +
    the.change

PTAd.chg.lo <- cf(combmod.rstanarm, PTAd.off.lo, PTAd.on.lo)

PTAd.on.hi <- PTAd.off.hi <- high_imp
PTAd.off.hi$PTAd.diff <- 0
PTAd.on.hi$PTAd.diff <- 0 +
    the.change

PTAd.chg.hi <- cf(combmod.rstanarm, PTAd.off.hi, PTAd.on.hi)

                                        # Office-holder
the.change <- 1
office.on.lo <- office.off.lo <- low_imp
office.off.lo$officeHolder.diff <- 0
office.on.lo$officeHolder.diff <- 0 +
    the.change

office.chg.lo <- cf(combmod.rstanarm, office.off.lo, office.on.lo)

office.on.hi <- office.off.hi <- high_imp
office.off.hi$officeHolder.diff <- 0
office.on.hi$officeHolder.diff <- 0 +
    the.change

office.chg.hi <- cf(combmod.rstanarm, office.off.hi, office.on.hi)

### seniority
Seniority.chg.lo <- low_imp
the.change <- 3.5 / 
    sd_std_vars["Seniority.diff"] +
    means_std_vars["Seniority.diff"]

seniority.on.lo <- seniority.off.lo <- low_imp
seniority.off.lo$Seniority.diff <- 0
seniority.on.lo$Seniority.diff <- 0 +
    the.change

seniority.chg.lo <- cf(combmod.rstanarm,
                       seniority.off.lo,
                       seniority.on.lo)

seniority.on.hi <- seniority.off.hi <- high_imp
seniority.off.hi$Seniority.diff <- 0
seniority.on.hi$Seniority.diff <- 0 +
    the.change

seniority.chg.hi <- cf(combmod.rstanarm,
                       seniority.off.hi,
                       seniority.on.hi)

### specialism
the.change <- (2/3) / 
    sd_std_vars["Specialism.diff"] +
    means_std_vars["Specialism.diff"]


specialism.on.lo <- specialism.off.lo <- low_imp
specialism.off.lo$Specialism.diff <- 0
specialism.on.lo$Specialism.diff <- 0 +
    the.change

specialism.chg.lo <- cf(combmod.rstanarm,
                       specialism.off.lo,
                       specialism.on.lo)

specialism.on.hi <- specialism.off.hi <- high_imp
specialism.off.hi$Specialism.diff <- 0
specialism.on.hi$Specialism.diff <- 0 +
    the.change

specialism.chg.hi <- cf(combmod.rstanarm,
                       specialism.off.hi,
                       specialism.on.hi)


### agreement
the.change <- 0.25 / 
    sd_std_vars["prop.diff"] +
    means_std_vars["prop.diff"]


prop.on.lo <- prop.off.lo <- low_imp
prop.off.lo$prop.diff <- 0
prop.on.lo$prop.diff <- 0 +
    the.change

prop.chg.lo <- cf(combmod.rstanarm,
                       prop.off.lo,
                       prop.on.lo)

prop.on.hi <- prop.off.hi <- high_imp
prop.off.hi$prop.diff <- 0
prop.on.hi$prop.diff <- 0 +
    the.change

prop.chg.hi <- cf(combmod.rstanarm,
                       prop.off.hi,
                       prop.on.hi)


### How to combine all these?
out <- rbind(workload.chg.lo, workload.chg.hi,
             PTAd.chg.lo, PTAd.chg.hi,
             office.chg.lo, office.chg.hi,
             seniority.chg.lo, seniority.chg.hi,
             specialism.chg.lo, specialism.chg.hi,
             prop.chg.lo, prop.chg.hi)

out <- as.data.frame(out)
changes <- c("10 days more workload...",
             "Sat on PTA panel",
             "Office-holder",
             "3.5 years more senior",
             "Specialist (e.g., from 0 to 2/3rds)",
             "25% higher rate of agreement")

changes <- str_wrap(changes, 20)

out$Change <- rep(changes, each = 2)

out$Importance <- rep(c("Low importance", "High importance"),
                      times = length(changes))
out$Importance <- factor(out$Importance,
                         levels = c("Low importance", "High importance"),
                         ordered = TRUE)

dodge.width <- 0.9

## Change bar colors
out$isSignificant <- apply(out[,c("lo", "hi")], 1, isSig)

out$inner.bar.col <- ifelse(out$isSignificant,
                            inner.bar.col,
                            "lightgrey")
out$outer.bar.col <- ifelse(out$isSignificant,
                            outer.bar.col,
                            "darkgrey")

ggplot(out, aes(x = Change,
                y = mean, ymin = lo, ymax = hi,
                group = Importance)) +
    scale_x_discrete("") +
    scale_y_continuous("Change in\nprobability of being selected\n(percentage points)",
                       labels = scales::percent) +
    geom_linerange(aes(colour = inner.bar.col),
                   position = position_dodge(width = dodge.width),
                   size=inner.bar.size, 
                   alpha = 0.8) +
    geom_linerange(aes(ymin = llo, ymax = hhi,
                       colour = outer.bar.col),
                   position = position_dodge(width = dodge.width),
                   size = outer.bar.size,
                   alpha = 0.8) +
    geom_hline(aes(x = 0, yintercept = 0), lty = 1) +
    geom_point(aes(colour = outer.bar.col, shape = Importance),
               position = position_dodge(width = dodge.width),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               stroke = 1.5) +
    scale_shape_manual("Importance", values = my.pchs) +
    scale_colour_identity() +
    scale_fill_identity() +
    theme_uksc() +
    coord_flip() +
    theme(legend.position = "bottom",
          legend.dir = "horizontal")






## ----fitstatsch5, fig = TRUE, fig.cap = "Goodness-of-fit statistics for different models. Thin lines indicate 95 percent credible intervals; thick lines indicate 83 percent credible intervals. ", warning = FALSE----

epcp.rstanarm <- function(mod) {
    
    poslp <- posterior_linpred(mod)
    sumdf <- as.data.frame(summary(mod)) 
    preds <- (poslp > 0.5)
    
    actual <- matrix(mod$y,
                     nrow = nrow(poslp),
                     ncol = length(mod$y),
                     byrow = TRUE)
    correct <- (preds == actual)

    correct <- apply(correct, 1, mean)
    epcp <- data.frame(term = "PCP\n(bigger is better)",
                       mean = mean(correct),
                       lo = quantile(correct, 0.025),
                       hi = quantile(correct, 0.975),
                       llo = quantile(correct, 1/12),
                       hhi = quantile(correct, 11/12))

    the.loo <- loo(mod)
    the.loo <- data.frame(term = "LOOIC\n(smaller is better)",
                          mean = the.loo$looic,
                          lo = the.loo$looic + qnorm(1/40) * the.loo$se_looic,
                          hi = the.loo$looic + qnorm(39/40) * the.loo$se_looic,
                          llo = the.loo$looic + qnorm(1/24) * the.loo$se_looic,
                          hhi = the.loo$looic + qnorm(23/24) * the.loo$se_looic)
                                        #    return(rbind(epcp, the.loo))
    return(rbind(epcp, the.loo))
}

if (file.exists("cache/05_gof.RData")) {
    load("cache/05_gof.RData")
} else { 
    
    legal.stats <- epcp.rstanarm(legalmod.rstanarm)
    org.stats <- epcp.rstanarm(orgmod.rstanarm)
    pol.stats <- epcp.rstanarm(polmod.rstanarm)
    comb.stats <- epcp.rstanarm(combmod.rstanarm)
    
    legal.stats$Model <- "Legal"
    org.stats$Model <- "Organisational"
    pol.stats$Model <- "Political"
    comb.stats$Model <- "Combined"

    null.stats <- data.frame(Model = "Null",
                             term = "PCP\n(bigger is better)",
                             mean = mean(combmod.rstanarm$y == 1, na.rm = TRUE))
    
    gof.df <- rbind(
        rbind(
            rbind(legal.stats,
                  org.stats),
            pol.stats),
        comb.stats)
    
    gof.df <- merge(gof.df, null.stats, all = TRUE)
    
    gof.df$Model <- factor(gof.df$Model,
                           levels = rev(c("Null", "Legal",
                                          "Organisational", "Political",
                                          "Combined")),
                           ordered = TRUE)
    legal.loo <- loo(legalmod.rstanarm)
    combined.loo <- loo(combmod.rstanarm)
    political.loo <- loo(polmod.rstanarm)
    org.loo <- loo(orgmod.rstanarm)
    
    save(list = c("legal.loo", "combined.loo",
                  "political.loo", "org.loo",
                  "gof.df"),
         file = "cache/05_gof.RData")
}

ggplot(gof.df, aes(x = Model, y = mean, ymax = hi, ymin = lo)) +
    geom_linerange(size = inner.bar.size,
                   colour = inner.bar.col,
                   alpha = 0.8) +
        geom_linerange(aes(ymin = llo, ymax = hhi),
                       size = outer.bar.size,
                       colour = outer.bar.col,
                       alpha = 0.8) +
    geom_point(aes(colour = outer.bar.col),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               shape= my.pchs[1],
               stroke = 1.5) +
    scale_colour_identity() + 
    facet_wrap(~term, scales = "free_x") +
    scale_y_continuous("Value") +
    coord_flip() +
    theme_uksc()

combined.pcp <- subset(gof.df,
                       Model == "Combined" & grepl("PCP", term)) %>%
    pull(mean)

combined.pcp <- round(100 * combined.pcp)


